{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "import lxml.etree as le \n",
    "import requests\n",
    "import ktool\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "url='https://www.runoob.com/html/html-tutorial.html'\n",
    "content=requests.get(url=url).content\n",
    "contentx=le.HTML(content)\n",
    "x='//div[@class=\"design\"]/a/text()'\n",
    "x2='//div[@class=\"design\"]/a/@href'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "rets=contentx.xpath(x)\n",
    "ret=contentx.xpath(x2)\n",
    "data=[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for i,v in zip(rets,ret):\n",
    "#     print(i.strip(),url+v)\n",
    "    a=url+v\n",
    "    q=i.strip()\n",
    "    data.append(dict(名称=q,dict=a))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'名称': 'HTML 教程',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-tutorial.html'},\n",
       " {'名称': 'HTML 简介',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-intro.html'},\n",
       " {'名称': 'HTML 编辑器',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-editors.html'},\n",
       " {'名称': 'HTML 基础',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-basic.html'},\n",
       " {'名称': 'HTML 元素',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-elements.html'},\n",
       " {'名称': 'HTML 属性',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-attributes.html'},\n",
       " {'名称': 'HTML 标题',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-headings.html'},\n",
       " {'名称': 'HTML 段落',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-paragraphs.html'},\n",
       " {'名称': 'HTML 文本格式化',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-formatting.html'},\n",
       " {'名称': 'HTML 链接',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-links.html'},\n",
       " {'名称': 'HTML 头部',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-head.html'},\n",
       " {'名称': 'HTML CSS',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-css.html'},\n",
       " {'名称': 'HTML 图像',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-images.html'},\n",
       " {'名称': 'HTML 表格',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-tables.html'},\n",
       " {'名称': 'HTML 列表',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-lists.html'},\n",
       " {'名称': 'HTML 区块',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-blocks.html'},\n",
       " {'名称': 'HTML 布局',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-layouts.html'},\n",
       " {'名称': 'HTML 表单',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-forms.html'},\n",
       " {'名称': 'HTML 框架',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-iframes.html'},\n",
       " {'名称': 'HTML 颜色',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-colors.html'},\n",
       " {'名称': 'HTML 颜色名',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-colornames.html'},\n",
       " {'名称': 'HTML 颜色值',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-colorvalues.html'},\n",
       " {'名称': 'HTML 脚本',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-scripts.html'},\n",
       " {'名称': 'HTML 字符实体',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-entities.html'},\n",
       " {'名称': 'HTML URL',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-url.html'},\n",
       " {'名称': 'HTML 速查列表',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-quicklist.html'},\n",
       " {'名称': 'HTML 标签简写及全称',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-tag-name.html'},\n",
       " {'名称': 'HTML 总结',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-summary.html'},\n",
       " {'名称': 'XHTML 简介',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-xhtml.html'},\n",
       " {'名称': 'HTML5 教程',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-intro.html'},\n",
       " {'名称': 'HTML5 浏览器支持',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.htmlhtml5-browsers.html'},\n",
       " {'名称': 'HTML5 新元素',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-new-element.html'},\n",
       " {'名称': 'HTML5 Canvas',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-canvas.html'},\n",
       " {'名称': 'HTML5 内联 SVG',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-svg.html'},\n",
       " {'名称': 'HTML5 MathML',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.htmlhtml5-mathml.html'},\n",
       " {'名称': 'HTML5 拖放',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-draganddrop.html'},\n",
       " {'名称': 'HTML5 地理定位',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-geolocation.html'},\n",
       " {'名称': 'HTML5 Video(视频)',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-video.html'},\n",
       " {'名称': 'HTML5 Audio(音频)',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-audio.html'},\n",
       " {'名称': 'HTML5 Input 类型',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-form-input-types.html'},\n",
       " {'名称': 'HTML5 表单元素',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-form-elements.html'},\n",
       " {'名称': 'HTML5 表单属性',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-form-attributes.html'},\n",
       " {'名称': 'HTML5 语义元素',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-semantic-elements.html'},\n",
       " {'名称': 'HTML5 Web 存储',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-webstorage.html'},\n",
       " {'名称': 'HTML5 Web SQL',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.htmlhtml5-web-sql.html'},\n",
       " {'名称': 'HTML5 应用程序缓存',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-app-cache.html'},\n",
       " {'名称': 'HTML5 Web Workers',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-webworkers.html'},\n",
       " {'名称': 'HTML5 SSE',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-serversentevents.html'},\n",
       " {'名称': 'HTML5 WebSocket',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-websocket.html'},\n",
       " {'名称': 'HTML5 测验',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/quiz/html5-quiz.html'},\n",
       " {'名称': 'HTML(5) 代码规范',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html5-syntax.html'},\n",
       " {'名称': 'HTML 媒体(Media)',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-media.html'},\n",
       " {'名称': 'HTML 插件',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-object.html'},\n",
       " {'名称': 'HTML 音频(Audio)',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-sounds.html'},\n",
       " {'名称': 'HTML 视频（Video）播放',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-videos.html'},\n",
       " {'名称': 'HTML 实例',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/html/html-examples.html'},\n",
       " {'名称': 'HTML 标签列表(字母排序)',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-reference.html'},\n",
       " {'名称': 'HTML 标签列表（功能排序）',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-byfunc.html'},\n",
       " {'名称': 'HTML 属性',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-standardattributes.html'},\n",
       " {'名称': 'HTML 事件',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-eventattributes.html'},\n",
       " {'名称': 'HTML 画布',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-canvas.html'},\n",
       " {'名称': 'HTML 音频/视频',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-av-dom.html'},\n",
       " {'名称': 'HTML 有效DOCTYPES',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-elementsdoctypes.html'},\n",
       " {'名称': 'HTML 颜色名',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-colorname.html'},\n",
       " {'名称': 'HTML 拾色器',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-colorpicker.html'},\n",
       " {'名称': 'HTML 字符集',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/charsets/html-charsets.html'},\n",
       " {'名称': 'HTML ASCII',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-ascii.html'},\n",
       " {'名称': 'HTML ISO-8859-1',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/ref-entities.html'},\n",
       " {'名称': 'HTML 符号',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-symbols.html'},\n",
       " {'名称': 'HTML URL 编码',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-urlencode.html'},\n",
       " {'名称': 'HTML 语言代码',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-language-codes.html'},\n",
       " {'名称': 'HTTP 消息',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-httpmessages.html'},\n",
       " {'名称': 'HTTP 方法',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-httpmethods.html'},\n",
       " {'名称': '键盘快捷键',\n",
       "  'dict': 'https://www.runoob.com/html/html-tutorial.html/tags/html-keyboardshortcuts.html'}]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_excel(writer,index=False,encoding='utf-8',sheet_name='Sheet1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "writer = pd.ExcelWriter('保存.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "writer.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
